---
title: "poq_margins"
output: html_document
date: "2025-01-18"
---

```{r, include=FALSE, fig.width=6, fig.height=5, results='asis'}
# remove all objects
rm(list=ls())

# Unload all packages 


# Add packages #not all are in use
pacman::p_load(
  here,
  dplyr,
  tidyverse, #dplyr, readr, etc.
  data.table, #fread() 
  foreign, #load data types including stata .dta files 
  magrittr, #%<>% operator
  skimr, #for summerising
  lubridate, #dates
  knitr,
  kableExtra,
  janitor,
  ggplot2,
  hablar,
  gplots,
  multcomp,
  broom,
  sjPlot,
  sjlabelled,
  sjmisc
)


## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   groupvars: a vector containing names of columns that contain grouping variables
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
    library(plyr)

    # New version of length which can handle NA's: if na.rm==T, don't count them
    length2 <- function (x, na.rm=FALSE) {
        if (na.rm) sum(!is.na(x))
        else       length(x)
    }

    # This does the summary. For each group's data frame, return a vector with
    # N, mean, and sd
    datac <- ddply(data, groupvars, .drop=.drop,
      .fun = function(xx, col) {
        c(N    = length2(xx[[col]], na.rm=na.rm),
          mean = mean   (xx[[col]], na.rm=na.rm),
          sd   = sd     (xx[[col]], na.rm=na.rm)
        )
      },
      measurevar
    )

    # Rename the "mean" column    
    datac <- rename(datac, c("mean" = measurevar))

    datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

    # Confidence interval multiplier for standard error
    # Calculate t-statistic for confidence interval: 
    # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
    ciMult <- qt(conf.interval/2 + .5, datac$N-1)
    datac$ci <- datac$se * ciMult

    return(datac)
}
```


```{r, include=F}

here::i_am("poq/poq_margins.Rmd")
igs_oct_23<-haven::read_dta(here("poq/data/igs_oct_23.dta"))
load(here("poq/data/da_clean.RData"))
load(here("poq/data/lucid_edit.RData"))
#remove non attentive respondents
lucid_edit %<>%
  mutate(duration_in_seconds=as.numeric(duration_in_seconds)) %>%
  filter(duration_in_seconds>180 | q389_1==100)

# create "progressives" dfs
da_prog <- subset(da_edit, prog1 == 1 | prog2 ==1) #total: 591 "progressives"
da_no_prog <- subset(da_edit, (prog1 < 1 & is.na(prog2)) |  (prog2 < 1 & is.na(prog1)) | (prog2 < 1 & prog1 < 1)) #total 255

da_edit %<>% mutate(prog_dummy = 
                       case_when(
                         prog > 0.5 ~ "progressive",
                         prog < 0.5 ~ "not prog",
                         prog == 0.5 ~ "somewhat prog"
                       ))

# separate "Recallers" from non
da_recall <- subset(da_edit, out_vote == 1) #total: 409 support recall
da_recall %<>% mutate(prog_binary = case_when(
  prog1 == 1 | prog2 == 1 ~ 1,
  TRUE ~ 0
))

da_no <- subset(da_edit, out_vote == 0) # total 457 oppose recall



highlight <- subset(da_recall, prog_binary ==1) #progressive voters who still voted for the recall ## total: 172 supported recall and some progressive agenda.
prog_norecall <- subset(da_prog, out_vote == 0) #total: 419 progressives who supported the DA (70% of progs supported the DA)
no_prog_norecall <- subset(da_no_prog, out_vote == 0) #total: 26 non-progressives who supported the DA (10% of non-progs supported the DA)
no_highlight <- subset(da_recall, prog_binary ==0) 



highlight %<>% mutate(out_vote = "Conflicted Progressives")

```

# IGS data

```{r}
igs_oct_23 %<>%
  mutate(q3_coded = case_when(
    Q3 == 1 ~ "more extensive, more intensive",
    Q3 == 2 ~ "more extensive, less intensive",
    Q3 == 3 ~ "less extensive, more intensive",
    Q3 == 4 ~ "less extensive, less intensive",
    TRUE ~ NA_character_ # Handle any other cases if needed
  ))

igs_oct_23 %<>% filter(!is.na(q3_coded))
chisq.test(table(igs_oct_23$q3_coded))
# the Chi-squared test result indicates that there is a statistically significant difference among the categories overall, with a very small p-value.

table_counts <- table(igs_oct_23$q3_coded)
table_prop <- prop.table(table_counts)
#weighted
table1_weighted_all <- with(igs_oct_23, tapply(w1, q3_coded, sum, na.rm=TRUE))
total_weight_all <- sum(igs_oct_23$w1)
table1_prop_weighted_all <- table1_weighted_all / total_weight_all
df_weighted_all <- data.frame(Category = names(table1_prop_weighted_all),
                          Proportion = as.vector(table1_prop_weighted_all))
weighted_plot_all <- ggplot(df_weighted_all, aes(x = Category, y = Proportion)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_errorbar(aes(ymin = Proportion - 1.96 * sqrt((Proportion * (1 - Proportion)) / total_weight_all),
                    ymax = Proportion + 1.96 * sqrt((Proportion * (1 - Proportion)) / total_weight_all)),
                width = 0.2) +
  labs(title = "CA - Proportions with 95% CI", x = "Category", y = "Proportion") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave("weighted_plot_all.png", plot = weighted_plot_all, width = 8, height = 6, dpi = 300)




### by region/SF


# BAY
igs_bay <- subset(igs_oct_23, region == 1)
table_counts_bay <- table(igs_bay$q3_coded)
table_prop_bay <- prop.table(table_counts_bay)
df_unweighted_bay <- data.frame(Category = names(table_prop_bay),
                             Proportion = as.vector(table_prop_bay))
unweighted_plot_bay <- ggplot(df_unweighted_bay, aes(x = Category, y = Proportion)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_errorbar(aes(ymin = Proportion - 1.96 * sqrt((Proportion * (1 - Proportion)) / sum(table_counts_bay)),
                    ymax = Proportion + 1.96 * sqrt((Proportion * (1 - Proportion)) / sum(table_counts_bay))),
                width = 0.2) +
  labs(title = "BAY: Proportions with 95% CI", x = "Category", y = "Proportion") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave("unweighted_plot_bay.png", plot = unweighted_plot_bay, width = 8, height = 6, dpi = 300)



# SF
igs_sf <- subset(igs_oct_23, CNTY == 38)
table_counts_sf <- table(igs_sf$q3_coded)
table_prop_sf <- prop.table(table_counts_sf)
df_unweighted_sf <- data.frame(Category = names(table_prop_sf),
                             Proportion = as.vector(table_prop_sf))
unweighted_plot_sf <- ggplot(df_unweighted_sf, aes(x = Category, y = Proportion)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_errorbar(aes(ymin = Proportion - 1.96 * sqrt((Proportion * (1 - Proportion)) / sum(table_counts_sf)),
                    ymax = Proportion + 1.96 * sqrt((Proportion * (1 - Proportion)) / sum(table_counts_sf))),
                width = 0.2) +
  labs(title = "SF: Proportions with 95% CI", x = "Category", y = "Proportion") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave("unweighted_plot_sf.png", plot = unweighted_plot_sf, width = 8, height = 6, dpi = 300)



###############################################################################################################
# To specifically test whether the "more extensive, less intensive" category is significantly different from the "less extensive, more intensive" category, I perform a post-hoc test.  a simple and straightforward way is to conduct a pairwise Chi-square test or a Fisher’s Exact Test for these two specific categories. 

# Create a subset for the two categories
subset_data <- subset(igs_oct_23, q3_coded %in% c("more extensive, less intensive", "less extensive, more intensive"))

# Create a contingency table
contingency_table <- table(subset_data$q3_coded)

# Perform Fisher's Exact Test if counts are low, else use Chi-square
if (any(contingency_table < 5)) {
    test_result <- fisher.test(contingency_table)
} else {
    test_result <- chisq.test(contingency_table)
}

test_result
# result indicates a statistically significant difference between the "more extensive, less intensive" and "less extensive, more intensive" categories. 

# To specifically test whether the three progressive categories are significantly different, I perform a post-hoc test. a simple and straightforward way is to conduct a pairwise Chi-square test or a Fisher’s Exact Test for these two specific categories. 

# Create a subset for the three categories
subset_data2 <- subset(igs_oct_23, q3_coded %in% c("more extensive, less intensive", "less extensive, more intensive", "less extensive, less intensive"))

# Create a contingency table
contingency_table <- table(subset_data2$q3_coded)

# Perform Fisher's Exact Test if counts are low, else use Chi-square
if (any(contingency_table < 5)) {
    test_result <- fisher.test(contingency_table)
} else {
    test_result <- chisq.test(contingency_table)
}

test_result
# result indicates a statistically significant difference between the "more extensive, less intensive" and "less extensive, more intensive" categories. 


```


## Table 7
```{r}
# is the difference between the effect of experimental condition X in group A and group B significant?
## group A: out_vote == 0
## group B: out_vote == 1
## outcome: out1
## groups: group1

# step 1: Subset the data for the specific version of the survey question you want to analyze
domore_data <- da_edit[da_edit$group1 == "do more", ]
doless_data <- da_edit[da_edit$group1 == "do less", ]
dodiff_data <- da_edit[da_edit$group1 == "do diff", ]

# step 2: Create two subgroups within each group
domore_vote1 <- domore_data[domore_data$out_vote == 1, ]
domore_vote0 <- domore_data[domore_data$out_vote == 0, ]

doless_vote1 <- doless_data[doless_data$out_vote == 1, ]
doless_vote0 <- doless_data[doless_data$out_vote == 0, ]

dodiff_vote1 <- dodiff_data[dodiff_data$out_vote == 1, ]
dodiff_vote0 <- dodiff_data[dodiff_data$out_vote == 0, ]

#step 3: Perform a two-sample t-test to check if the means of out1 are significantly different between the two subgroups. The t-test will assume equal variances by default, but you can also perform a Welch's t-test if the variances are unequal.
domore_test <- t.test(domore_vote1$out1, domore_vote0$out1)
doless_test <- t.test(doless_vote1$out1, doless_vote0$out1)
dodiff_test <- t.test(dodiff_vote1$out1, dodiff_vote0$out1)
#Note that this analysis assumes that the out1 variable is approximately normally distributed within each subgroup. Alternatively, I may need to use a non-parametric alternative, such as the Wilcoxon rank-sum test (wilcox.test).
# 
# wilcox.test(domore_vote1$out1, domore_vote0$out1)
# wilcox.test(doless_vote1$out1, doless_vote0$out1)
# wilcox.test(dodiff_vote1$out1, dodiff_vote0$out1)

# Load necessary libraries
library(xtable)


# Compile results into a data frame
results_rev2 <- data.frame(
  Group = c("Get Tough", "Reduce Extent", "Reduce Intensity"),
  tStatistic = c(domore_test$statistic, doless_test$statistic, dodiff_test$statistic),
  pValue = c(domore_test$p.value, doless_test$p.value, dodiff_test$p.value),
  LowerCI = c(domore_test$conf.int[1], doless_test$conf.int[1], dodiff_test$conf.int[1]),
  UpperCI = c(domore_test$conf.int[2], doless_test$conf.int[2], dodiff_test$conf.int[2]),
  N1 = c(length(domore_vote1$out1), length(doless_vote1$out1), length(dodiff_vote1$out1)),
  N0 = c(length(domore_vote0$out1), length(doless_vote0$out1), length(dodiff_vote0$out1))
)

# Convert results to a LaTeX table using xtable
latex_table <- xtable(results_rev2, caption = "Statistical Results of the Experimental Conditions Across Groups", label = "tab:results_rev2", align = "lccccccc")
print(latex_table, include.rownames = FALSE, hline.after = NULL, booktabs = TRUE, sanitize.text.function = function(x){x})


```

# Table 8
```{r}

da_recall$group1 <- relevel(da_recall$group1, ref= "do more")
da_no$group1 <- relevel(da_no$group1, ref= "do more")
highlight$group1 <- relevel(highlight$group1, ref= "do more")
recall_models_new<-list("(1) Recall Proponents" = estimatr::lm_robust(out1 ~ group1
                               , 
                               data = da_recall,
                               clusters = response_id),
             "(2) Recall Opponents" = estimatr::lm_robust(out1 ~ group1
                               , 
                               data = da_no,
                               clusters = response_id),
"(3) Conflicted Progressives" = estimatr::lm_robust(out1 ~ group1 
                               , 
                               data = highlight,
                               clusters = response_id))





modelsummary::modelsummary(recall_models_new, output = "latex", title = 'Linear Regression Model Predicting Candidate Support Under Each Treatment Condition (compared to "get-tough" condition)',
                           coef_map = c("group1do less" = "Lower extensive margin",
                      "group1do diff" = "Lower intensive margin",
                      "group1do more"  ="Get tough"
                      ),
                           gof_map = c("nobs", "r.squared", "fstatistic"),
             statistic = c("conf.int",     
                           "t = {statistic}",
                           "p = {p.value}"),
             notes = "All models report the result of the effect of the experimental conditions on supporting a hypothetical DA. Standrd errors are clustered at the respondent's level.")


```

# Figure 4
```{r}
tmp_amy2 <- subset(da_edit, !is.na(out_vote))
tmp_amy2 <- subset(tmp_amy2, !is.na(out1))
da_sum <- summarySE(tmp_amy2, measurevar="out1", groupvars=c("out_vote","group1"))
da_sum %<>%
  mutate(out_vote = case_when(
    out_vote == 0 ~ "against recall",
    out_vote == 1 ~ "for recall"
  ))
lvl_ordr <- c("do more", "do diff", "do less")

# Setting factor levels and labels for clarity
da_sum$group1 <- factor(da_sum$group1, levels = lvl_ordr, labels = c("Get-Tough", "Reduce Intensity", "Reduce Extent"))

# Creating the plot
p <- ggplot(subset(da_sum, group1 != "control"), aes(x=group1, y=out1, group=out_vote, color=out_vote)) +
  geom_errorbar(aes(ymin=out1-ci, ymax=out1+ci), width=.1, color="grey30") +
  stat_summary(geom="line", aes(linetype=out_vote), size=1.5) +
  geom_point(size=4, shape=21, fill="white", aes(color=out_vote)) +
  geom_hline(yintercept=0.5, linetype="dotted", size=1.2, color="grey") +
  scale_x_discrete() +
  xlab("Condition") + ylab("Political Support (Mean)") +
  scale_color_manual(values=c("darkblue", "darkred"), labels=c("No on Recall", "Yes on Recall")) +
  scale_linetype_manual(values=c("solid", "dotted"), labels=c("No on Recall", "Yes on Recall")) +
  ggtitle("The Effect of Justice Dimensions on Support for Candidate Breakdown by Recall Vote Choice") +
  theme_minimal() +
  theme(legend.position = "bottom", legend.direction = "horizontal",
        plot.background = element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.border = element_blank())

# Saving the plot
ggsave("updated_sf_break.png", plot = p, width = 12, height = 7, units = "in")


```

#Figure 3
```{r}

#just conflicted progressives
tmp_highlight <- subset(highlight, !is.na(out1))

da_sum <- summarySE(tmp_highlight, measurevar="out1", groupvars=c("group1"))
lvl_ordr <- c("do more", "do diff", "do less")


# Setting factor levels and labels for clarity
da_sum$group1 <- factor(da_sum$group1, levels = lvl_ordr, labels = c("Get-Tough", "Reduce Intensity", "Reduce Extent"))

# Creating the plot
p <- ggplot(subset(da_sum, group1 != "Control"), aes(x=group1, y=out1)) +
  geom_errorbar(aes(ymin=out1-ci, ymax=out1+ci), width=.1, color="grey30") +
  stat_summary(geom="line", aes(group=1), size=1.5, color="darkmagenta") +
  geom_point(size=4, shape=21, color="darkmagenta", fill="white") +
  geom_hline(yintercept=0.5, linetype="dotted", size=1.2, color="grey") +
  scale_x_discrete() +
  xlab("Condition") + ylab("Political Support (Mean)") +
  ggtitle("The Effect of Justice Dimensions on Support for 'Conflicted Progressives'") +
  theme_minimal() +
  theme(legend.position = "none",
        plot.background = element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.border = element_blank())

# Saving the plot
ggsave("updated_sf_conflict.png", plot = p, width = 12, height = 7, units = "in")


```

#Figure 5,6,7
```{r}

library(randomForest)
require(caTools)
# I'll use the da_edit data because I want to predict out_vote and then apply it to lucid data
match_da <- dplyr::select(da_edit, out_vote, 
                          out2, sal, puni_t, prog, sym, rr) 

#prepare the data
match_da <- transform(
  match_da,
  out_vote=as.factor(out_vote)
)

summary(match_da)
sapply(match_da, class)
colSums(is.na(match_da))
match_da %<>% na.omit()


# The performance on the validation set gives you an idea of how well the model might generalize to new, unseen data, such as the second dataset:
sample = sample.split(match_da$out_vote, SplitRatio = .9)
train = subset(match_da, sample == TRUE)
test  = subset(match_da, sample == FALSE)
dim(train)
dim(test)


set.seed(6969)
rf <- randomForest(
   out_vote ~ out2+ sal+ puni_t+ prog+ sym+ rr,
   ntree = 2000,
   mtry=1,
   data=train
 )
rf 
importance(rf)
varImpPlot(rf)

test$out_vote_rf <- predict(rf, newdata = test)
table(test$out_vote,test$out_vote_rf)
library(ROCR)
pred1=predict(rf,type = "prob")
perf = prediction(pred1[,2], train$out_vote)
auc = performance(perf, "auc") # 0.7 to 0.8 is considered acceptable, 0.8 to 0.9 is considered excellent, and more than 0.9 is considered outstanding.
pred3 = performance(perf, "tpr","fpr")
plot(pred3,main="ROC Curve for Random Forest",col=2,lwd=2)
abline(a=0,b=1,lwd=2,lty=2,col="gray")
Metrics::rmse(test$out_vote, predict(rf, test))



##### now train on the full sample and apply to lucid #####
rf_df_puni_diff <- data.frame(matrix(ncol= 7, nrow=100))
colnames(rf_df_puni_diff) <- c("mean_x", "mean_y", "p_val", "conf_int_low", "conf_int_high", "statistic", "est_treat_eff") 

rf_df_doless_diff <- data.frame(matrix(ncol= 7, nrow=100))
colnames(rf_df_doless_diff) <- c("mean_x", "mean_y", "p_val", "conf_int_low", "conf_int_high", "statistic", "est_treat_eff")

rf_df_domore_diff <- data.frame(matrix(ncol= 7, nrow=100))
colnames(rf_df_domore_diff) <- c("mean_x", "mean_y", "p_val", "conf_int_low", "conf_int_high", "statistic", "est_treat_eff")

rf_df_prog_diff <- data.frame(matrix(ncol= 7, nrow=100))
colnames(rf_df_prog_diff) <- c("mean_x", "mean_y", "p_val", "conf_int_low", "conf_int_high", "statistic", "est_treat_eff") 


rf_df_prog_puni <- data.frame(matrix(ncol= 7, nrow=100))
colnames(rf_df_prog_puni) <- c("mean_x", "mean_y", "p_val", "conf_int_low", "conf_int_high", "statistic", "est_treat_eff") 

for (i in 1:100){
rf2 <- randomForest(
  out_vote ~ out2+ sal+ puni_t+ prog+rr+ sym,# 
  ntree = 2000,
  mtry = 1,
  data=match_da
)
lucid_edit$out_vote_rf <- predict(rf2, newdata = lucid_edit)
lucid_edit_rf <-subset(lucid_edit, !is.na(out_vote_rf))


ttest1<-t.test(subset(lucid_edit_rf, out_vote_rf==1 & group1=="do diff")$out1,subset(lucid_edit_rf, out_vote_rf==1 & group1=="do less")$out1)
rf_df_puni_diff$mean_x[i]<-as.numeric(ttest1$estimate["mean of x"])
rf_df_puni_diff$mean_y[i]<-as.numeric(ttest1$estimate["mean of y"])
rf_df_puni_diff$p_val[i]<-as.numeric(ttest1$p.value)
rf_df_puni_diff$conf_int_low[i]<-as.numeric(ttest1$conf.int[1])
rf_df_puni_diff$conf_int_high[i]<-as.numeric(ttest1$conf.int[2])
rf_df_puni_diff$statistic[i]<-as.numeric(ttest1$statistic["t"])
rf_df_puni_diff$est_treat_eff[i]<-rf_df_puni_diff$mean_x[i]-rf_df_puni_diff$mean_y[i]

ttest2<-t.test(subset(lucid_edit_rf, out_vote_rf==1 & group1=="do less")$out1,subset(lucid_edit_rf, out_vote_rf==0 & group1=="do less")$out1)
rf_df_doless_diff$mean_x[i]<-as.numeric(ttest2$estimate["mean of x"])
rf_df_doless_diff$mean_y[i]<-as.numeric(ttest2$estimate["mean of y"])
rf_df_doless_diff$p_val[i]<-as.numeric(ttest2$p.value)
rf_df_doless_diff$conf_int_low[i]<-as.numeric(ttest2$conf.int[1])
rf_df_doless_diff$conf_int_high[i]<-as.numeric(ttest2$conf.int[2])
rf_df_doless_diff$statistic[i]<-as.numeric(ttest2$statistic["t"])
rf_df_doless_diff$est_treat_eff[i]<-rf_df_doless_diff$mean_x[i]-rf_df_doless_diff$mean_y[i]

ttest3<-t.test(subset(lucid_edit_rf, out_vote_rf==0 & group1=="do diff")$out1,subset(lucid_edit_rf, out_vote_rf==0 & group1=="do more")$out1)
rf_df_prog_diff$mean_x[i]<-as.numeric(ttest3$estimate["mean of x"])
rf_df_prog_diff$mean_y[i]<-as.numeric(ttest3$estimate["mean of y"])
rf_df_prog_diff$p_val[i]<-as.numeric(ttest3$p.value)
rf_df_prog_diff$conf_int_low[i]<-as.numeric(ttest3$conf.int[1])
rf_df_prog_diff$conf_int_high[i]<-as.numeric(ttest3$conf.int[2])
rf_df_prog_diff$statistic[i]<-as.numeric(ttest3$statistic["t"])
rf_df_prog_diff$est_treat_eff[i]<-rf_df_prog_diff$mean_x[i]-rf_df_prog_diff$mean_y[i]

ttest4<-t.test(subset(lucid_edit_rf, out_vote_rf==1 & group1=="do more")$out1,subset(lucid_edit_rf, out_vote_rf==0 & group1=="do more")$out1)
rf_df_domore_diff$mean_x[i]<-as.numeric(ttest4$estimate["mean of x"])
rf_df_domore_diff$mean_y[i]<-as.numeric(ttest4$estimate["mean of y"])
rf_df_domore_diff$p_val[i]<-as.numeric(ttest4$p.value)
rf_df_domore_diff$conf_int_low[i]<-as.numeric(ttest4$conf.int[1])
rf_df_domore_diff$conf_int_high[i]<-as.numeric(ttest4$conf.int[2])
rf_df_domore_diff$statistic[i]<-as.numeric(ttest4$statistic["t"])
rf_df_domore_diff$est_treat_eff[i]<-rf_df_domore_diff$mean_x[i]-rf_df_domore_diff$mean_y[i]

ttest5<-t.test(subset(lucid_edit_rf, out_vote_rf==0 & group1=="do diff")$out1,subset(lucid_edit_rf, out_vote_rf==1 & group1=="do diff")$out1)
rf_df_prog_puni$mean_x[i]<-as.numeric(ttest5$estimate["mean of x"])
rf_df_prog_puni$mean_y[i]<-as.numeric(ttest5$estimate["mean of y"])
rf_df_prog_puni$p_val[i]<-as.numeric(ttest5$p.value)
rf_df_prog_puni$conf_int_low[i]<-as.numeric(ttest5$conf.int[1])
rf_df_prog_puni$conf_int_high[i]<-as.numeric(ttest5$conf.int[2])
rf_df_prog_puni$statistic[i]<-as.numeric(ttest5$statistic["t"])
rf_df_prog_puni$est_treat_eff[i]<-rf_df_prog_puni$mean_x[i]-rf_df_prog_puni$mean_y[i]
}

#tables output
library(knitr)
library(kableExtra)
library(dplyr)
rf_df_prog_puni$tab <- "Group differences: Intensive margin"
rf_df_domore_diff$tab <- "Group differences: Get-tough"
rf_df_prog_diff$tab <- "Intensive and Get-tough margins differences: progressive groups"
rf_df_doless_diff$tab <- "Group differences: Extensive margin"
rf_df_puni_diff$tab <- "Extensive and Intensive margins differences: punitive groups"

combined_data <- bind_rows(rf_df_prog_puni, rf_df_domore_diff, rf_df_doless_diff, rf_df_prog_diff, rf_df_puni_diff)

table_latex <- combined_data %>%
  kable("latex", booktabs = TRUE, longtable = TRUE, caption = "Summary of all treatment effects estimations (100 RF models)") %>%
  kable_styling(latex_options = c("scale_down", "hold_position")) %>%
  add_header_above(c("Estimated Means" = 2, "Statistics" = 4, "Treatment Effect" = 1, "Dataset" = 1), escape = FALSE) %>%
  column_spec(8, bold = TRUE, border_left = TRUE) 


sink("table_output.tex")
cat(table_latex)
sink()


puni_diff <- ggplot(data = rf_df_puni_diff, aes(est_treat_eff, rank(est_treat_eff, ties.method = "first"))) +
  geom_point(alpha = 1) +
  geom_errorbarh(aes(xmin = conf_int_low, xmax = conf_int_high),
                 alpha = .6,
                 height = 0) +
  geom_vline(xintercept = 0, lty = 2, size = 1) +
  xlab("Estimated Treatment Effect") +
  #facet_wrap( ~ treatment) +
  theme_bw() +
  theme(
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    strip.background = element_blank(),
    panel.grid.minor.y = element_blank(), 
    panel.grid.major.x = element_blank(),
    plot.title = element_text(hjust = 0.5)
  )+ggtitle("Punitive respondents - Difference between
            lowering intensive and extensive margins") 

prog_diff <- ggplot(data = rf_df_prog_diff, aes(est_treat_eff, rank(est_treat_eff, ties.method = "first"))) +
  geom_point(alpha = 1) +
  geom_errorbarh(aes(xmin = conf_int_low, xmax = conf_int_high),
                 alpha = .6,
                 height = 0) +
  geom_vline(xintercept = 0, lty = 2, size = 1) +
  xlab("Estimated Treatment Effect") +
  #facet_wrap( ~ treatment) +
  theme_bw() +
  theme(
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    strip.background = element_blank(),
    panel.grid.minor.y = element_blank(), 
    panel.grid.major.x = element_blank(),
    plot.title = element_text(hjust = 0.5)
  ) +ggtitle("Progressive respondents - Difference between 
            lowering intensive margin and get-tough") 


prog_puni <- ggplot(data = rf_df_prog_puni, aes(est_treat_eff, rank(est_treat_eff, ties.method = "first"))) +
  geom_point(alpha = 1) +
  geom_errorbarh(aes(xmin = conf_int_low, xmax = conf_int_high),
                 alpha = .6,
                 height = 0) +
  geom_vline(xintercept = 0, lty = 2, size = 1) +
  xlab("Estimated Difference in support for Reduce Intensity") +
  #facet_wrap( ~ treatment) +
  theme_bw() +
  theme(
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    strip.background = element_blank(),
    panel.grid.minor.y = element_blank(), 
    panel.grid.major.x = element_blank(),
    plot.title = element_text(hjust = 0.5)
  ) +ggtitle("Difference between groups attitudes
             toward Reduce Intensity") 

do_less <- ggplot(data = rf_df_doless_diff, aes(est_treat_eff, rank(est_treat_eff, ties.method = "first"))) +
  geom_point(alpha = 1) +
  geom_errorbarh(aes(xmin = conf_int_low, xmax = conf_int_high),
                 alpha = .6,
                 height = 0) +
  geom_vline(xintercept = 0, lty = 2, size = 1) +
  xlab("Estimated Difference in support for Reduce Extent") +
  #facet_wrap( ~ treatment) +
  theme_bw() +
  theme(
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    strip.background = element_blank(),
    panel.grid.minor.y = element_blank(), 
    panel.grid.major.x = element_blank(),
    plot.title = element_text(hjust = 0.5)
  ) +ggtitle("Respondents have different attitudes
             toward Reduce Extent") 

do_more <- ggplot(data = rf_df_domore_diff, aes(est_treat_eff, rank(est_treat_eff, ties.method = "first"))) +
  geom_point(alpha = 1) +
  geom_errorbarh(aes(xmin = conf_int_low, xmax = conf_int_high),
                 alpha = .6,
                 height = 0) +
  geom_vline(xintercept = 0, lty = 2, size = 1) +
  xlab("Estimated Difference in support for Get-Tough") +
  #facet_wrap( ~ treatment) +
  theme_bw() +
  theme(
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    strip.background = element_blank(),
    panel.grid.minor.y = element_blank(), 
    panel.grid.major.x = element_blank(),
    plot.title = element_text(hjust = 0.5)
  ) +ggtitle("Respondents have different attitudes
             toward Get-Tough") 
g_top <- ggpubr::ggarrange(puni_diff, prog_diff, labels = rep("", 2), ncol = 2, nrow = 1)
g_top
ggsave("rf_top.png", width = 8, height = 4, units = "in")
prog_puni
ggsave("rf_middle.png", width = 4, height = 4, units = "in")
g_bottom <- ggpubr::ggarrange(do_less, do_more, labels = rep("", 2), ncol = 2, nrow = 1)
g_bottom
ggsave("rf_bottom.png", width = 8, height = 4, units = "in")

```


# Figur I.1
```{r}
load(here::here("analysis/data/ps_match.RData"))


tmp_lucid_ps <- subset(lucid_ps_match, !is.na(out_vote))
tmp_lucid_ps <- subset(tmp_lucid_ps, !is.na(out1))

da_sum <- summarySE(tmp_lucid_ps, measurevar="out1", groupvars=c("out_vote","group1"))
da_sum %<>%
  mutate(out_vote = case_when(
    out_vote == 0 ~ "against recall",
    out_vote == 1 ~ "for recall"
  ))
lvl_ordr <- c("do more", "do diff", "do less")
## Use 95% confidence interval 
ggplot(subset(da_sum, group1 != "control"), aes(x=factor(group1, level = lvl_ordr), y=out1, group = out_vote, colour=out_vote, group = 2)) + 
    geom_errorbar(aes(ymin=out1-ci, ymax=out1+ci), width=.1) +
    #geom_line() +
    stat_summary(geom = "line", size = 1.5) +
    geom_point(size=3, shape=21, fill="white") + # 21 is filled circle
    geom_hline(yintercept=0.5, linetype="dotted", size = 1.2) +
    xlab("Condition") +
  scale_x_discrete(labels=c("do more" = "Get-Tough", "do diff" = "Reduce Intensity",
                              "do less" = "Reduce Extent")) +
    ylab("Political support (mean)") +
    scale_colour_hue(name="Matched to:",    # Legend label, use darker colors
                     breaks=c("against recall", "for recall"),
                     labels=c("No on Recall", "Yes on Recall"),
                     l=40) +                    # Use darker colors, lightness=40
    ggtitle("The Effect of Justice Dimensions on Support for Candidate
            Break Down by Estimated Recall Vote Choice") +
 theme(legend.position = "bottom", legend.direction = "horizontal")
ggsave("lucid_ps_break.png", width = 12, height = 7, units = "in")

```




# Figure 2, G.1
```{r, results='asis'}


context_model<-list("All voters" = estimatr::lm_robust(out2 ~ group2*out_vote,
                               data = da_edit,
                               clusters = response_id))

modelsummary::modelsummary(context_model, output = "latex", title = "The effect of 'Chesa Boudin' on Progressive Policies Support",
                           coef_map = c("group2Chesa context" = "With Boudin Context",
                      "out_vote" = "Recall Support",
                      "group2Chesa context:out_vote"  ="Boudin Context X Recall Support"
                      ),
                           gof_map = c("nobs", "r.squared", "fstatistic"),
             statistic = c("conf.int",     
                           "t = {statistic}",
                           "p = {p.value}"),
             notes = "All models report the result of the effect of the experimental conditions on the average support to four progressive policies that Chesa Boudin implemented. Models 1 shows the interaction, and models 2, and 3 only the effect of the treatment condition on average support, by vote choice. Standrd errors are clustered at the respondent's level."
             )



# now using ggplot
lvl_ordr2 <- c("Chesa context","no Chesa context")
tmp3 <- subset(da_edit, !is.na(out_vote))
tmp3 <- subset(tmp3, !is.na(out2))

da_sum2 <- summarySE(tmp3, measurevar="out2", groupvars=c("out_vote","group2"))
da_sum2 %<>%
  mutate(out_vote = case_when(
    out_vote == 0 ~ "against recall",
    out_vote == 1 ~ "for recall"
  )) 
## Use 95% confidence interval 
ggplot(da_sum2, aes(x=factor(group2,level = lvl_ordr2),y=out2, group = out_vote, colour=out_vote, group = 2)) + 
    geom_errorbar(aes(ymin=out2-ci, ymax=out2+ci), width=.1) +
    #geom_line() +
    stat_summary(geom = "line", size = 1.5) +
  geom_point(size=3, shape=21, fill="white") + # 21 is filled circle
  geom_hline(yintercept=0.5, linetype="dotted", size = 1.2) +
  xlab("Condition") +
  scale_x_discrete(labels=c("Chesa context" = "W/Boudin Information", "no Chesa context" = "w/o Boudin Information")) +
    ylab("Policies support (mean)") +
    scale_colour_hue(name="Recall vote",    # Legend label, use darker colors
                     breaks=c("against recall", "for recall"),
                     labels=c("No on Recall", "Yes on Recall"),
                     l=40) +                    # Use darker colors, lightness=40
    ggtitle("The effect of 'Chesa' intervention on support for Chesa's policies")+
  theme(legend.position = "bottom", legend.direction = "horizontal")
ggsave("study3.png", width = 9.5, height = 7.5, units = "in")




######################################################################33
lvl_ordr2 <- c("Chesa context","no Chesa context")
no_highlight_tmp <- subset(no_highlight, !is.na(out2))
highlight_tmp <- subset(highlight, !is.na(out2))
da_no_tmp <- subset(da_no, !is.na(out2))

no_highlight_sum <- summarySE(no_highlight_tmp, measurevar="out2", groupvars=c("out_vote","group2"))
no_highlight_sum %<>%
  mutate(out_vote = "Other Recall Supporters")

highlight_sum <- summarySE(subset(highlight, !is.na(out2)), measurevar="out2", groupvars=c("out_vote","group2"))
highlight_sum %<>%
  mutate(out_vote = "Conflicted Progressives")

tmp_sum <- join(highlight_sum,no_highlight_sum, type = "full")

# Setting consistent levels and labels for factors
tmp_sum$group2 <- factor(tmp_sum$group2, levels = lvl_ordr2, labels = c("W/Boudin Information", "W/o Boudin Information"))

# Creating the plot
p <- ggplot(tmp_sum, aes(x=group2, y=out2, group=out_vote, shape=out_vote)) +
  geom_errorbar(aes(ymin=out2-ci, ymax=out2+ci), width=.1, color="grey30") +
  stat_summary(geom="line", aes(color=out_vote), size=0.8) +
  geom_point(aes(color=out_vote), size=4) +
  geom_hline(yintercept=0.5, linetype="dotted", size=1.2) +
  geom_text(aes(label = round(out2, 2)), hjust=-0.5, vjust=1, color="black") +
  annotate("text", x=1, y = 0.34, label="N=239") +
  annotate("text", x=1, y = 0.66, label="N=175") +
  scale_x_discrete() +
  scale_color_manual(values=c("darkmagenta", "darkred"), labels=c("Conflicted Progressives", "Other Recall Supporters")) +
  scale_shape_manual(values=c(17, 15)) +
  ggtitle("The Effect of 'Boudin' Information on Support for His Policies") +
  xlab("Condition") + ylab("Policy Support (Mean)") +
  theme_minimal() +
  theme(legend.position = "bottom", legend.direction = "horizontal",
        plot.background = element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.border = element_blank())

# Saving the plot
ggsave("updated_study3_2.png", plot = p, width = 9.5, height = 7.5, units = "in")



```


# Figure 1
```{r, results='asis'}

highlight %<>% mutate(out_vote = "Yes")


# Removing rows where 'prog' is NaN and refining the subset condition
refined_data <- da_edit %>%
  filter(!is.na(prog)) %>%
  filter(!(prog >= 0.5 & out_vote == 1)) %>%
  mutate(out_vote = case_when(out_vote == "0" ~ "No", out_vote == "1" ~ "Yes"))

# Creating the plot
p <- ggplot(refined_data, aes(x=prog, y=out_vote)) +
  geom_jitter(aes(color = as.factor(out_vote)), alpha = 0.6) +
  scale_color_manual(values = c("darkblue", "darkred")) +
  geom_jitter(data = highlight, aes(fill = as.factor(out_vote)), color = "darkmagenta", size = 3, shape = 21) +
  ggtitle("Voters on the Progressive Scale") +
  xlab("Progressive Ideology") + ylab("In Favor of Recall?") +
  labs(color = "Recall Vote", fill = "Conflicted Progressives") +
  theme_minimal() +
  theme(legend.position = "bottom", legend.direction = "horizontal",
        plot.background = element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.border = element_blank()) +
  annotate("text", x=seq(0, 1, by = 0.25), y = 2.5, 
           label=sapply(seq(0, 1, by = 0.25), function(x) paste0("N=", length(subset(refined_data, prog == x)$out_vote))), 
           hjust = 0.5, vjust = -0.5)

# Saving the plot
ggsave("updated_puzzle2.png", plot = p, width = 11, height = 8, units = "in")

```